clear all
set more off
set mem 10000000
set matsize 10000
version 13

*********************************************************************** 
*** Build File to Process Raw Census 1991 PCA and merge to 2001 PCA ***
*********************************************************************** 
 
** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

** Process 1991 PCA (a district-level dataset)
{

* Clean and rename some variables
{
use "$pca/pca_census91_st_dist.dta", clear 
rename dist_code91 dt_code91
rename dtname district
gen state = district if dt_code91=="00"
order st_code91 dt_code91 state district
egen temp = mode(state), by(st_code91)
replace state = temp if state==""
replace state = upper(trim(itrim(state)))
replace district = upper(trim(itrim(district)))
drop temp code91
drop if dt_code91=="00"
duplicates drop
gen pca91_id = _n
order pca91_id 
}


* Drop Total (= Rural + Urban) variables
foreach r of varlist *r91 {
  local u = subinstr("`r'","_r91","_u91",1)
	local t = subinstr("`r'","_r91","_t91",1)
	assert abs(`t' - ( `r' + `u')) <0.1
	drop `t'
}
 

* Rename and redefine variables to be consistent with 2001/2011 PCA
{
foreach i in r u {
	rename res_house_`i'91 no_hh_res_`i'91
	rename households_`i'91 no_hh_`i'91
	rename t_popln_`i'91 tot_p_`i'91
	rename t_m_popln_`i'91 tot_m_`i'91
	rename t_f_popln_`i'91 tot_f_`i'91
	gen pct_06_`i'91 = (popln_m6_`i'91 + popln_f6_`i'91)/tot_p_`i'91
	gen pct_sc_`i'91 = (m_sc_`i'91 + f_sc_`i'91)/tot_p_`i'91
	gen pct_st_`i'91 = (m_st_`i'91 + f_st_`i'91)/tot_p_`i'91
	gen lit_p_`i'91 = (m_literate_`i'91 + f_literate_`i'91)/tot_p_`i'91
	gen lit_m_`i'91 = m_literate_`i'91/tot_m_`i'91
	gen lit_f_`i'91 = f_literate_`i'91/tot_f_`i'91
	gen work_p_`i'91 = (t_m_worker_`i'91+t_f_worker_`i'91+m_marginal_`i'91+f_marginal_`i'91)/tot_p_`i'91
	gen work_m_`i'91 = (t_m_worker_`i'91+m_marginal_`i'91)/tot_m_`i'91
	gen work_f_`i'91 = (t_f_worker_`i'91+f_marginal_`i'91)/tot_f_`i'91
	gen work_main_p_`i'91 = (t_m_worker_`i'91+t_f_worker_`i'91)/tot_p_`i'91
	gen work_main_m_`i'91 = (t_m_worker_`i'91)/tot_m_`i'91
	gen work_main_f_`i'91 = (t_f_worker_`i'91)/tot_f_`i'91
	gen work_marg_p_`i'91 = (m_marginal_`i'91+f_marginal_`i'91)/tot_p_`i'91
	gen work_marg_m_`i'91 = (m_marginal_`i'91)/tot_m_`i'91
	gen work_marg_f_`i'91 = (f_marginal_`i'91)/tot_f_`i'91
	gen work_pooled_cl_p_`i'91 = (m_indcat1_`i'91 + f_indcat1_`i'91)/tot_p_`i'91
	gen work_pooled_cl_m_`i'91 = (m_indcat1_`i'91)/tot_m_`i'91
	gen work_pooled_cl_f_`i'91 = (f_indcat1_`i'91)/tot_f_`i'91
	gen work_pooled_al_p_`i'91 = (m_indcat2_`i'91 + f_indcat2_`i'91)/tot_p_`i'91
	gen work_pooled_al_m_`i'91 = (m_indcat2_`i'91)/tot_m_`i'91
	gen work_pooled_al_f_`i'91 = (f_indcat2_`i'91)/tot_f_`i'91
	gen work_pooled_ag_p_`i'91 = work_pooled_cl_p_`i'91 + work_pooled_al_p_`i'91
	gen work_pooled_ag_m_`i'91 = work_pooled_cl_m_`i'91 + work_pooled_al_m_`i'91
	gen work_pooled_ag_f_`i'91 = work_pooled_cl_f_`i'91 + work_pooled_al_f_`i'91
	gen work_pooled_hh_p_`i'91 = (m_indcat5a_`i'91 + f_indcat5a_`i'91)/tot_p_`i'91
	gen work_pooled_hh_m_`i'91 = (m_indcat5a_`i'91)/tot_m_`i'91
	gen work_pooled_hh_f_`i'91 = (f_indcat5a_`i'91)/tot_f_`i'91
	egen temp_ot_m = rowtotal(m_indcat3_`i'91 m_indcat4_`i'91 m_indcat5b_`i'91 m_indcat6_`i'91 m_indcat7_`i'91 m_indcat8_`i'91 m_indcat9_`i'91)
	egen temp_ot_f = rowtotal(f_indcat3_`i'91 f_indcat4_`i'91 f_indcat5b_`i'91 f_indcat6_`i'91 f_indcat7_`i'91 f_indcat8_`i'91 f_indcat9_`i'91)
	gen work_pooled_ot_p_`i'91 = (temp_ot_m + temp_ot_f)/tot_p_`i'91
	gen work_pooled_ot_m_`i'91 = (temp_ot_m)/tot_m_`i'91
	gen work_pooled_ot_f_`i'91 = (temp_ot_f)/tot_f_`i'91
	gen pct_scst_`i'91 = pct_sc_`i'91 + pct_st_`i'91
	gen pct_ag_al_`i'91 = work_pooled_al_p_`i'91/work_pooled_ag_p_`i'91
	gen pct_work_ag_`i'91 = work_pooled_ag_p_`i'91/work_p_`i'91
	gen pct_work_main_`i'91 = work_main_p_`i'91/work_p_`i'91
	
	drop popln_?6_`i'91 ?_sc_`i'91 ?_st_`i'91 ?_literate_`i'91 t_?_worker_`i'91 ?_marginal_`i'91 *indcat*_`i'91 temp* ?_non_work_`i'91
}
foreach v of varlist *_r91 {
	local newv = subinstr("`v'","_r91","_91",1)
	rename `v' `newv'
}
foreach v of varlist *_u91 {
	local newv = subinstr("`v'","_u91","_91u",1)
	rename `v' `newv'
}
}

}

********************************************************************************
********************************************************************************

** Merge with 2001 districts
{

* First match states, dropping non-RGGVY states
{
gen state91 = state
drop if state=="ANDAMAN & NICOBAR ISLANDS"
drop if state=="CHANDIGARGH"
drop if state=="DADRA & NAGAR HAVELI"
drop if state=="DAMAN & DIU"
drop if state=="DELHI"
drop if state=="GOA"
drop if state=="LAKSHADWEEP"
drop if state=="PONDICHEERY"
// JHARKHAND was carved out of BIHAR in 2000
// CHHATTISGARH was carved out of MADHYA PRADESH in 2000
// UTTARAKHAND was carved out of UTTAR PRADESH in 2000
// JAMMU & KASHMIR is not present in the 1991 PCA
expand 2 if inlist(state,"BIHAR","MADHYA PRADESH","UTTAR PRADESH"), gen(dupes)
replace state = "JHARKHAND" if state=="BIHAR" & dupes==1
replace state = "CHHATTISGARH" if state=="MADHYA PRADESH" & dupes==1
replace state = "UTTARAKHAND" if state=="UTTAR PRADESH" & dupes==1

merge m:m state using "$pca/pca_census01_names.dta", keepusing(st_code) nogen
duplicates drop
drop if state=="JAMMU & KASHMIR"
}


* Next match districts, with obvious spelling inconsistencies
{
gen district91 = district
replace district = "LAHUL & SPITI" if district=="LAHUL AND SPITI" & state=="HIMACHAL PRADESH"
replace district = "SURGUJA" if district=="SARGUJA" & state=="CHHATTISGARH"
replace district = "CHITTAURGARH" if district=="CHITTORGARH" & state=="RAJASTHAN"
replace district = "DHAULPUR" if district=="DHOLPUR" & state=="RAJASTHAN"
replace district = "JALOR" if district=="JALAUR" & state=="RAJASTHAN"
replace district = "JHUNJHUNUN" if district=="JHUNJHUNU" & state=="RAJASTHAN"
replace district = "BULANDSHAHAR" if district=="BULANDSHAHR" & state=="UTTAR PRADESH"
replace district = "MAHRAJGANJ" if district=="MAHARAJGANJ" & state=="UTTAR PRADESH"
replace district = "SHAHJAHANPUR" if district=="SHAHJEHANPUR" & state=="UTTAR PRADESH"
replace district = "SIDDHARTHNAGAR" if district=="SIDDHARTH NAGAR" & state=="UTTAR PRADESH"
replace district = "EAST" if district=="EAST DISTRICT" & state=="SIKKIM"
replace district = "NORTH" if district=="NORTH DISTRICT" & state=="SIKKIM"
replace district = "SOUTH" if district=="SOUTH DISTRICT" & state=="SIKKIM"
replace district = "WEST" if district=="WEST DISTRICT" & state=="SIKKIM"
replace district = "HAORA" if district=="HOWRAH" & state=="WEST BENGAL"
replace district = "NORTH TWENTY FOUR PARGANAS" if district=="NORTH 24 PARAGANAS" & state=="WEST BENGAL"
replace district = "SOUTH TWENTY FOUR PARGANAS" if district=="SOUTH 24 PARAGANAS" & state=="WEST BENGAL"
replace district = "HAZARIBAGH" if district=="HAZARIBAG" & state=="JHARKHAND"
replace district = "MAHESANA" if district=="MAHASANA" & state=="GUJARAT"
replace district = "AHMADNAGAR" if district=="AHMEDNAGAR" & state=="MAHARASHTRA"
replace district = "VISAKHAPATNAM" if district=="VISAKHAPATANAM" & state=="ANDHRA PRADESH"
replace district = "NIZAMABAD" if district=="NNIZAMABAD" & state=="ANDHRA PRADESH"
replace district = "DAKSHINA KANNADA" if district=="DAKSHIN KANNAD" & state=="KARNATAKA"
replace district = "UTTARA KANNADA" if district=="UTTAR KANNAD" & state=="KARNATAKA"
replace district = "DINDIGUL" if district=="DINDIGUL ANNA" & state=="TAMIL NADU"
replace district = "PUDUKKOTTAI" if district=="PUDDUKKOTTAI" & state=="TAMIL NADU"
replace district = "TIRUCHIRAPPALLI" if district=="TIRUCHIRAPALLI" & state=="TAMIL NADU"
replace district = "TIRUNELVELI" if district=="TIRUNELVELI-KATTABOMMAN" & state=="TAMIL NADU"
replace district = "TIRUVANNAMALAI" if district=="TIRUVANNAMALAI-SAMBUVARA." & state=="TAMIL NADU"
replace district = "THE NILGIRIS" if district=="NILGIRI" & state=="TAMIL NADU"
}


* Next split 1991 districts and assign to 2001 carve-out district (http://www.cdedse.org/pdf/work176.pdf)
{
  //ARUNACHAL PRADESH
expand 2 if state=="ARUNACHAL PRADESH" & inlist(district,"EAST SIANG","LOWER SUBANSIRI"), gen(dupesARUN)
replace district = "UPPER SIANG" if district=="EAST SIANG" & state=="ARUNACHAL PRADESH" & dupesARUN==1
replace district = "PAPUM PARE" if district=="LOWER SUBANSIRI" & state=="ARUNACHAL PRADESH" & dupesARUN==1

  //BIHAR
expand 2 if state=="BIHAR" & inlist(district,"BHAGALPUR","BHOJPUR","ROHTAS","SITAMARHI","SAHARSA","MUNGER"), gen(dupesBIH)
replace district = "BANKA" if district=="BHAGALPUR" & state=="BIHAR" & dupesBIH==1
replace district = "BUXAR" if district=="BHOJPUR" & state=="BIHAR" & dupesBIH==1
replace district = "KAIMUR (BHABUA)" if district=="ROHTAS" & state=="BIHAR" & dupesBIH==1
replace district = "SHEOHAR" if district=="SITAMARHI" & state=="BIHAR" & dupesBIH==1
replace district = "SUPAUL" if district=="SAHARSA" & state=="BIHAR" & dupesBIH==1
replace district = "JAMUI" if district=="MUNGER" & state=="BIHAR" & dupesBIH==1
expand 2 if state=="BIHAR" & inlist(district,"MUNGER"), gen(dupesBIH2)
replace district = "LAKHISARAI" if district=="MUNGER" & state=="BIHAR" & dupesBIH2==1
expand 2 if state=="BIHAR" & inlist(district,"MUNGER"), gen(dupesBIH3)
replace district = "SHEIKHPURA" if district=="MUNGER" & state=="BIHAR" & dupesBIH3==1

  //CHHATTISGARH
expand 2 if state=="CHHATTISGARH" & inlist(district,"SURGUJA","RAIGARH","RAJNANDGAON","RAIPUR","BASTAR","BILASPUR"), gen(dupesCHH)
replace district = "KORIYA" if district=="SURGUJA" & state=="CHHATTISGARH" & dupesCHH==1
replace district = "JASHPUR" if district=="RAIGARH" & state=="CHHATTISGARH" & dupesCHH==1
replace district = "KAWARDHA" if district=="RAJNANDGAON" & state=="CHHATTISGARH" & dupesCHH==1
replace district = "DHAMTARI" if district=="RAIPUR" & state=="CHHATTISGARH" & dupesCHH==1
replace district = "DANTEWADA" if district=="BASTAR" & state=="CHHATTISGARH" & dupesCHH==1
replace district = "KORBA" if district=="BILASPUR" & state=="CHHATTISGARH" & dupesCHH==1
expand 2 if state=="CHHATTISGARH" & inlist(district,"RAIPUR","BASTAR","BILASPUR"), gen(dupesCHH2)
replace district = "MAHASAMUND" if district=="RAIPUR" & state=="CHHATTISGARH" & dupesCHH2==1
replace district = "KANKER" if district=="BASTAR" & state=="CHHATTISGARH" & dupesCHH2==1
replace district = "JANJGIR - CHAMPA" if district=="BILASPUR" & state=="CHHATTISGARH" & dupesCHH2==1

  //GUJARAT
expand 2 if state=="GUJARAT" & inlist(district,"PANCH MAHALS","VALSAD","KHEDA","BHARUCH","MAHESANA","JUNAGADH"), gen(dupesGUJ)
replace district = "DOHAD" if district=="PANCH MAHALS" & state=="GUJARAT" & dupesGUJ==1
replace district = "NAVSARI" if district=="VALSAD" & state=="GUJARAT" & dupesGUJ==1
replace district = "ANAND" if district=="KHEDA" & state=="GUJARAT" & dupesGUJ==1
replace district = "NARMADA" if district=="BHARUCH" & state=="GUJARAT" & dupesGUJ==1
replace district = "PATAN" if district=="MAHESANA" & state=="GUJARAT" & dupesGUJ==1
replace district = "PORBANDAR" if district=="JUNAGADH" & state=="GUJARAT" & dupesGUJ==1

  //HARYANA
expand 2 if state=="HARYANA" & inlist(district,"HISAR","ROHTAK","AMBALA"), gen(dupesHAR)
replace district = "FATEHABAD" if district=="HISAR" & state=="HARYANA" & dupesHAR==1
replace district = "JHAJJAR" if district=="ROHTAK" & state=="HARYANA" & dupesHAR==1
replace district = "PANCHKULA" if district=="AMBALA" & state=="HARYANA" & dupesHAR==1

  //JHARKHAND
expand 2 if state=="JHARKHAND" & inlist(district,"GIRIDIH","HAZARIBAGH","PALAMU","SAHIBGANJ"), gen(dupesJHA)
replace district = "BOKARO" if district=="GIRIDIH" & state=="JHARKHAND" & dupesJHA==1
replace district = "CHATRA" if district=="HAZARIBAGH" & state=="JHARKHAND" & dupesJHA==1
replace district = "GARHWA" if district=="PALAMU" & state=="JHARKHAND" & dupesJHA==1
replace district = "PAKAUR" if district=="SAHIBGANJ" & state=="JHARKHAND" & dupesJHA==1
expand 2 if state=="JHARKHAND" & inlist(district,"HAZARIBAGH"), gen(dupesJHA2)
replace district = "KODARMA" if district=="HAZARIBAGH" & state=="JHARKHAND" & dupesJHA2==1

  //KARNATAKA
expand 2 if state=="KARNATAKA" & inlist(district,"BIJAPUR","MYSORE","CHITRADURGA","DAKSHINA KANNADA","RAICHUR","DHARWAD"), gen(dupesKAR)
replace district = "BAGALKOT" if district=="BIJAPUR" & state=="KARNATAKA" & dupesKAR==1
replace district = "CHAMARAJANAGAR" if district=="MYSORE" & state=="KARNATAKA" & dupesKAR==1
replace district = "DAVANAGERE" if district=="CHITRADURGA" & state=="KARNATAKA" & dupesKAR==1
replace district = "KOPPAL" if district=="RAICHUR" & state=="KARNATAKA" & dupesKAR==1
replace district = "UDUPI" if district=="DAKSHINA KANNADA" & state=="KARNATAKA" & dupesKAR==1
replace district = "GADAG" if district=="DHARWAD" & state=="KARNATAKA" & dupesKAR==1
expand 2 if state=="KARNATAKA" & inlist(district,"DHARWAD"), gen(dupesKAR2)
replace district = "HAVERI" if district=="DHARWAD" & state=="KARNATAKA" & dupesKAR2==1

  //MAHARASHTRA
expand 2 if state=="MAHARASHTRA" & inlist(district,"BHANDARA","PARBHANI","DHULE","AKOLA"), gen(dupesMAH)
replace district = "GONDIYA" if district=="BHANDARA" & state=="MAHARASHTRA" & dupesMAH==1
replace district = "HINGOLI" if district=="PARBHANI" & state=="MAHARASHTRA" & dupesMAH==1
replace district = "NANDURBAR" if district=="DHULE" & state=="MAHARASHTRA" & dupesMAH==1
replace district = "WASHIM" if district=="AKOLA" & state=="MAHARASHTRA" & dupesMAH==1

  //MADHYA PRADESH
expand 2 if state=="MADHYA PRADESH" & inlist(district,"WEST NIMAR","MANDLA","HOSHANGABAD","JABALPUR","MANDSAUR","MORENA","SHAHDOL"), gen(dupesMP)
replace district = "BARWANI" if district=="WEST NIMAR" & state=="MADHYA PRADESH" & dupesMP==1
replace district = "DINDORI" if district=="MANDLA" & state=="MADHYA PRADESH" & dupesMP==1
replace district = "HARDA" if district=="HOSHANGABAD" & state=="MADHYA PRADESH" & dupesMP==1
replace district = "KATNI" if district=="JABALPUR" & state=="MADHYA PRADESH" & dupesMP==1
replace district = "NEEMUCH" if district=="MANDSAUR" & state=="MADHYA PRADESH" & dupesMP==1
replace district = "SHEOPUR" if district=="MORENA" & state=="MADHYA PRADESH" & dupesMP==1
replace district = "UMARIA" if district=="SHAHDOL" & state=="MADHYA PRADESH" & dupesMP==1

  //MANIPUR
expand 2 if state=="MANIPUR" & inlist(district,"IMPHAL","SENAPATI"), gen(dupesMAN)
replace district = "IMPHAL EAST" if district=="IMPHAL" & state=="MANIPUR" & dupesMAN==0
replace district = "IMPHAL WEST" if district=="IMPHAL" & state=="MANIPUR" & dupesMAN==1
replace district = "SENAPATI (EXCLUDING 3 SUB-DIVISIONS)" if district=="SENAPATI" & state=="MANIPUR" & dupesMAN==1

  //MEGHALAYA
expand 2 if state=="MEGHALAYA" & inlist(district,"WEST GARO HILLS","EAST KHASI HILLS"), gen(dupesMEG)
replace district = "RI BHOI" if district=="EAST KHASI HILLS" & state=="MEGHALAYA" & dupesMEG==1
replace district = "SOUTH GARO HILLS" if district=="WEST GARO HILLS" & state=="MEGHALAYA" & dupesMEG==1

  //MIZORAM
expand 2 if state=="MIZORAM" & inlist(district,"AIZAWL","CHHIMTUIPUI"), gen(dupesMIZ)
replace district = "CHAMPHAI" if district=="AIZAWL" & state=="MIZORAM" & dupesMIZ==1
replace district = "LAWNGTLAI" if district=="CHHIMTUIPUI" & state=="MIZORAM" & dupesMIZ==0
replace district = "SAIHA" if district=="CHHIMTUIPUI" & state=="MIZORAM" & dupesMIZ==1
expand 2 if state=="MIZORAM" & inlist(district,"AIZAWL"), gen(dupesMIZ2)
replace district = "KOLASIB" if district=="AIZAWL" & state=="MIZORAM" & dupesMIZ2==1
expand 2 if state=="MIZORAM" & inlist(district,"AIZAWL"), gen(dupesMIZ3)
replace district = "MAMIT" if district=="AIZAWL" & state=="MIZORAM" & dupesMIZ3==1
expand 2 if state=="MIZORAM" & inlist(district,"AIZAWL"), gen(dupesMIZ4)
replace district = "SERCHHIP" if district=="AIZAWL" & state=="MIZORAM" & dupesMIZ4==1

  //NAGALAND
expand 2 if state=="NAGALAND" & inlist(district,"KOHIMA"), gen(dupesNAG)
replace district = "DIMAPUR" if district=="KOHIMA" & state=="NAGALAND" & dupesNAG==1

  //ORISSA
expand 2 if state=="ORISSA" & (inlist(district,"DHENKANAL","SAMBALPUR","PHULBANI","BALESHWAR","GANJAM") | inlist(district,"CUTTACK","PURI","KORAPUT","KALAHANDI","BALANGIR")), gen(dupesOR)
replace district = "ANUGUL" if district=="DHENKANAL" & state=="ORISSA" & dupesOR==1
replace district = "BARGARH" if district=="SAMBALPUR" & state=="ORISSA" & dupesOR==1
replace district = "BAUDH" if district=="PHULBANI" & state=="ORISSA" & dupesOR==0
replace district = "BHADRAK" if district=="BALESHWAR" & state=="ORISSA" & dupesOR==1
replace district = "GAJAPATI" if district=="GANJAM" & state=="ORISSA" & dupesOR==1
replace district = "JAGATSINGHAPUR" if district=="CUTTACK" & state=="ORISSA" & dupesOR==1
replace district = "KANDHAMAL" if district=="PHULBANI" & state=="ORISSA" & dupesOR==1
replace district = "KHORDHA" if district=="PURI" & state=="ORISSA" & dupesOR==1
replace district = "MALKANGIRI" if district=="KORAPUT" & state=="ORISSA" & dupesOR==1
replace district = "NUAPADA" if district=="KALAHANDI" & state=="ORISSA" & dupesOR==1
replace district = "SONAPUR" if district=="BALANGIR" & state=="ORISSA" & dupesOR==1
expand 2 if state=="ORISSA" & inlist(district,"SAMBALPUR","CUTTACK","KORAPUT","PURI"), gen(dupesOR2)
replace district = "JHARSUGUDA" if district=="SAMBALPUR" & state=="ORISSA" & dupesOR2==1
replace district = "JAJAPUR" if district=="CUTTACK" & state=="ORISSA" & dupesOR2==1
replace district = "NAYAGARH" if district=="PURI" & state=="ORISSA" & dupesOR2==1
replace district = "NABARANGAPUR" if district=="KORAPUT" & state=="ORISSA" & dupesOR2==1
expand 2 if state=="ORISSA" & inlist(district,"SAMBALPUR","CUTTACK","KORAPUT"), gen(dupesOR3)
replace district = "DEBAGARH" if district=="SAMBALPUR" & state=="ORISSA" & dupesOR3==1
replace district = "KENDRAPARA" if district=="CUTTACK" & state=="ORISSA" & dupesOR3==1
replace district = "RAYAGADA" if district=="KORAPUT" & state=="ORISSA" & dupesOR3==1

  //PUNJAB
expand 2 if state=="PUNJAB" & inlist(district,"PATIALA","BATHINDA","FARIDKOT","JALANDHAR"), gen(dupesPUN)
replace district = "FATEHGARH SAHIB" if district=="PATIALA" & state=="PUNJAB" & dupesPUN==1
replace district = "MANSA" if district=="BATHINDA" & state=="PUNJAB" & dupesPUN==1
replace district = "MOGA" if district=="FARIDKOT" & state=="PUNJAB" & dupesPUN==1
replace district = "NAWANSHAHR" if district=="JALANDHAR" & state=="PUNJAB" & dupesPUN==1
expand 2 if state=="PUNJAB" & inlist(district,"FARIDKOT"), gen(dupesPUN2)
replace district = "MUKTSAR" if district=="FARIDKOT" & state=="PUNJAB" & dupesPUN2==1

  //RAJASTHAN
expand 2 if state=="RAJASTHAN" & inlist(district,"KOTA","JAIPUR","GANGANAGAR","SAWAI MADHOPUR","UDAIPUR"), gen(dupesRAJ)
replace district = "BARAN" if district=="KOTA" & state=="RAJASTHAN" & dupesRAJ==1
replace district = "DAUSA" if district=="JAIPUR" & state=="RAJASTHAN" & dupesRAJ==1
replace district = "HANUMANGARH" if district=="GANGANAGAR" & state=="RAJASTHAN" & dupesRAJ==1
replace district = "KARAULI" if district=="SAWAI MADHOPUR" & state=="RAJASTHAN" & dupesRAJ==1
replace district = "RAJSAMAND" if district=="UDAIPUR" & state=="RAJASTHAN" & dupesRAJ==1

  //TAMIL NADU
replace district = "ERODE" if district=="PERIYAR" & state=="TAMIL NADU"
replace district = "SIVAGANGA" if district=="PASUMPON M. THEVAR" & state=="TAMIL NADU"
replace district = "THOOTHUKKUDI" if district=="CHIDAMBARANAR" & state=="TAMIL NADU"
replace district = "VELLORE" if district=="NORTH ARCOT-AMBEDKAR" & state=="TAMIL NADU"
replace district = "VIRUDHUNAGAR" if district=="KAMARAJAR" & state=="TAMIL NADU"
expand 2 if state=="TAMIL NADU" & inlist(district,"TIRUCHIRAPPALLI","SOUTH ARCOT","CHENGALPATTU-MGR","THANJAVUR","SALEM","MADURAI"), gen(dupesTN)
replace district = "ARIYALUR" if district=="TIRUCHIRAPPALLI" & state=="TAMIL NADU" & dupesTN==1
replace district = "CUDDALORE" if district=="SOUTH ARCOT" & state=="TAMIL NADU" & dupesTN==0
replace district = "KANCHEEPURAM" if district=="CHENGALPATTU-MGR" & state=="TAMIL NADU" & dupesTN==0
replace district = "NAGAPATTINAM" if district=="THANJAVUR" & state=="TAMIL NADU" & dupesTN==1
replace district = "NAMAKKAL" if district=="SALEM" & state=="TAMIL NADU" & dupesTN==1
replace district = "THENI" if district=="MADURAI" & state=="TAMIL NADU" & dupesTN==1
replace district = "THIRUVALLUR" if district=="CHENGALPATTU-MGR" & state=="TAMIL NADU" & dupesTN==1
replace district = "VILUPPURAM" if district=="SOUTH ARCOT" & state=="TAMIL NADU" & dupesTN==1
expand 2 if state=="TAMIL NADU" & inlist(district,"TIRUCHIRAPPALLI","THANJAVUR"), gen(dupesTN2)
replace district = "KARUR" if district=="TIRUCHIRAPPALLI" & state=="TAMIL NADU" & dupesTN2==1
replace district = "THIRUVARUR" if district=="THANJAVUR" & state=="TAMIL NADU" & dupesTN2==1
expand 2 if state=="TAMIL NADU" & inlist(district,"TIRUCHIRAPPALLI"), gen(dupesTN3)
replace district = "PERAMBALUR" if district=="TIRUCHIRAPPALLI" & state=="TAMIL NADU" & dupesTN3==1

  //TRIPURA
expand 2 if state=="TRIPURA" & inlist(district,"NORTH TRIPURA"), gen(dupesTRI)
replace district = "DHALAI" if district=="NORTH TRIPURA" & state=="TRIPURA" & dupesTRI==1

  //UTTARAKHAND
expand 2 if state=="UTTARAKHAND" & inlist(district,"ALMORA","PITHORAGARH","CHAMOLI","NAINITAL"), gen(dupesUK)
replace district = "BAGESHWAR" if district=="ALMORA" & state=="UTTARAKHAND" & dupesUK==1
replace district = "CHAMPAWAT" if district=="PITHORAGARH" & state=="UTTARAKHAND" & dupesUK==1
replace district = "RUDRAPRAYAG" if district=="CHAMOLI" & state=="UTTARAKHAND" & dupesUK==1
replace district = "UDHAM SINGH NAGAR" if district=="NAINITAL" & state=="UTTARAKHAND" & dupesUK==1

  //UTTAR PRADESH
expand 2 if state=="UTTAR PRADESH" & (inlist(district,"MORADABAD","FARRUKHABAD","ALLAHABAD","HAMIRPUR","BANDA","BAHRAICH") | inlist(district,"GONDA","DEORIA","VARANASI","ALIGARH","FAIZABAD","ETAWAH","MEERUT","GHAZIABAD","BASTI")), gen(dupesUP)
replace district = "JYOTIBA PHULE NAGAR" if district=="MORADABAD" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "KANNAUJ" if district=="FARRUKHABAD" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "KAUSHAMBI" if district=="ALLAHABAD" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "MAHOBA" if district=="HAMIRPUR" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "CHITRAKOOT" if district=="BANDA" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "SHRAWASTI" if district=="BAHRAICH" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "BALRAMPUR" if district=="GONDA" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "KUSHINAGAR" if district=="DEORIA" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "CHANDAULI" if district=="VARANASI" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "HATHRAS" if district=="ALIGARH" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "AMBEDKAR NAGAR" if district=="FAIZABAD" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "AURAIYA" if district=="ETAWAH" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "BAGHPAT" if district=="MEERUT" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "GAUTAM BUDDHA NAGAR" if district=="GHAZIABAD" & state=="UTTAR PRADESH" & dupesUP==1
replace district = "SANT KABIR NAGAR" if district=="BASTI" & state=="UTTAR PRADESH" & dupesUP==1
expand 2 if state=="UTTAR PRADESH" & inlist(district,"VARANASI","",""), gen(dupesUP2)
replace district = "SANT RAVIDAS NAGAR BHADOHI" if district=="VARANASI" & state=="UTTAR PRADESH" & dupesUP2==1

  //WEST BENGAL
expand 2 if state=="WEST BENGAL" & inlist(district,"WEST DINAJPUR"), gen(dupesWB)
replace district = "DAKSHIN DINAJPUR" if district=="WEST DINAJPUR" & state=="WEST BENGAL" & dupesWB==0
replace district = "UTTAR DINAJPUR" if district=="WEST DINAJPUR" & state=="WEST BENGAL" & dupesWB==1
}


* Merge with 2001 PCA on district name, to grab district codes
{
joinby st_code district using "$pca/pca_census01_names.dta", unm(both)
drop pca01_id bk_code_pca block vi_code village no_hh tot_p tot_m tot_f
duplicates drop
drop if st_code==1 // J&K missing from 1991 PCA
sort st_code district _merge
unique pca91_id
egen max_merge = max(_merge), by(pca91_id)
assert max_merge==3 | tot_p_91==0
drop if _merge<max_merge
drop if _merge==1
unique pca91_id
assert _merge==3
drop _merge dupes* max_merge
}


}


********************************************************************************
********************************************************************************

** Clean up, label, and save
{
* Label PCA population and labor variables
{
la var area_91				"District area (rural)"
la var no_hh_res_91			"No. occupied residences (rural)"
la var no_hh_91				"No. households (rural)"
la var tot_p_91				"1991 district total population (rural)"
la var tot_m_91				"1991 district male population (rural)"
la var tot_f_91				"1991 district female population (rural)"
la var pct_06_91 			"% pop 0-6 years old (rural)"
la var pct_sc_91 			"% pop scheduled caste (rural)"
la var pct_st_91 			"% pop scheduled tribe (rural)"
la var lit_p_91  			"% pop literate (rural)"
la var lit_m_91  			"% male pop literate (rural)"
la var lit_f_91  			"% female pop literate (rural)"
la var work_p_91			"% pop workers (rural)"
la var work_m_91			"% male pop workers (rural)"
la var work_f_91			"% female pop workers (rural)"
la var work_main_p_91		"% pop main workers (rural)"
la var work_main_m_91		"% male pop main workers (rural)"
la var work_main_f_91		"% female pop main workers (rural)"
la var work_marg_p_91		"% pop marg workers (rural)"
la var work_marg_m_91		"% male pop marg workers (rural)"
la var work_marg_f_91		"% female pop marg workers (rural)"
la var work_pooled_cl_p_91	"% pop cultiavtors (rural)"
la var work_pooled_cl_m_91	"% male pop cultiavtors (rural)"
la var work_pooled_cl_f_91	"% female pop cultiavtors (rural)"
la var work_pooled_al_p_91	"% pop agri-laborers (rural)"
la var work_pooled_al_m_91	"% male pop agri-laborers (rural)"
la var work_pooled_al_f_91	"% female pop agri-laborers (rural)"
la var work_pooled_ag_p_91	"% pop ag workers (rural)"
la var work_pooled_ag_m_91	"% male pop ag workers (rural)"
la var work_pooled_ag_f_91	"% female pop ag workers (rural)"
la var work_pooled_hh_p_91	"% pop household industry workers (rural)"
la var work_pooled_hh_m_91	"% male pop household industry workers (rural)"
la var work_pooled_hh_f_91	"% female pop household industry workers (rural)"
la var work_pooled_ot_p_91	"% pop other workers (rural)"
la var work_pooled_ot_m_91	"% male pop other workers (rural)"
la var work_pooled_ot_f_91	"% female pop other workers (rural)"
la var pct_scst_91			"% pop SC or ST (rural)"
la var pct_ag_al_91			"% of ag workers that are agri-laborers (rural)"
la var pct_work_ag_91		"% of workers that are ag workers (rural)"
la var pct_work_main_91		"% of workers that are main workers (rural)"

la var area_91u				"District area (urban)"
la var no_hh_res_91u		"No. occupied residences (urban)"
la var no_hh_91u			"No. households (urban)"
la var tot_p_91u			"1991 district total population (urban)"
la var tot_m_91u			"1991 district male population (urban)"
la var tot_f_91u			"1991 district female population (urban)"
la var pct_06_91u 			"% pop 0-6 years old (urban)"
la var pct_sc_91u 			"% pop scheduled caste (urban)"
la var pct_st_91u 			"% pop scheduled tribe (urban)"
la var lit_p_91u  			"% pop literate (urban)"
la var lit_m_91u  			"% male pop literate (urban)"
la var lit_f_91u  			"% female pop literate (urban)"
la var work_p_91u			"% pop workers (urban)"
la var work_m_91u			"% male pop workers (urban)"
la var work_f_91u			"% female pop workers (urban)"
la var work_main_p_91u		"% pop main workers (urban)"
la var work_main_m_91u		"% male pop main workers (urban)"
la var work_main_f_91u		"% female pop main workers (urban)"
la var work_marg_p_91u		"% pop marg workers (urban)"
la var work_marg_m_91u		"% male pop marg workers (urban)"
la var work_marg_f_91u		"% female pop marg workers (urban)"
la var work_pooled_cl_p_91u	"% pop cultiavtors (urban)"
la var work_pooled_cl_m_91u	"% male pop cultiavtors (urban)"
la var work_pooled_cl_f_91u	"% female pop cultiavtors (urban)"
la var work_pooled_al_p_91u	"% pop agri-laborers (urban)"
la var work_pooled_al_m_91u	"% male pop agri-laborers (urban)"
la var work_pooled_al_f_91u	"% female pop agri-laborers (urban)"
la var work_pooled_ag_p_91u	"% pop ag workers (urban)"
la var work_pooled_ag_m_91u	"% male pop ag workers (urban)"
la var work_pooled_ag_f_91u	"% female pop ag workers (urban)"
la var work_pooled_hh_p_91u	"% pop household industry workers (urban)"
la var work_pooled_hh_m_91u	"% male pop household industry workers (urban)"
la var work_pooled_hh_f_91u	"% female pop household industry workers (urban)"
la var work_pooled_ot_p_91u	"% pop other workers (urban)"
la var work_pooled_ot_m_91u	"% male pop other workers (urban)"
la var work_pooled_ot_f_91u	"% female pop other workers (urban)"
la var pct_scst_91u			"% pop SC or ST (urban)"
la var pct_ag_al_91u		"% of ag workers that are agri-laborers (urban)"
la var pct_work_ag_91u		"% of workers that are ag workers (urban)"
la var pct_work_main_91u	"% of workers that are main workers (urban)"
}


* Label other variables
{
la var pca91_id "1991 PCA district id (non-unique after 2001 district merge!)"
la var st_code91 "1991 state code"
la var dt_code91 "1991 district code"
la var state91 "1991 state name"
la var district91 "1991 district name"
la var st_code "2001 state code"
la var dt_code "2001 district code"
la var state "2001 state name"
la var district "2001 district name"
order pca91_id st_code91 dt_code91 state91 district91 st_code dt_code state district
destring st_code91 dt_code91, replace
unique st_code dt_code
duplicates drop
compress
save "$pca/pca_census91.dta", replace

}

}

********************************************************************************
********************************************************************************
